Demonstrating Stock Price Prediction methods to identify the trade across tech firms - Intel, Nvidia, AMD. The data is caputed across 3 years starting from March 2019 to March 2022. The data is extracted from Yahoo Finance.

Columns extracted from Yahoo Finance:

1.“Date” : Trading date of a stock 2.“Open” : Opening price of a stock as per the day 3.“High” : High is a stock’s highest trading price of the day 4.“Low” : Low is a stock’s lowest trading price of the day 5.“Close” : Closing price of a stock 6.“Adj.Close” : The closing price after adjustments for all applicable splits and dividend distributions. 7.“Volume” : Volume measures the number of a stock’s shares that are traded on a stock exchange in a day or a period of time

The data is captured at day level for all the tech firms

####Installing required packages

library(prophet)
## Loading required package: Rcpp
## Loading required package: rlang
## Warning: package 'rlang' was built under R version 4.1.2
library(quantmod)
## Warning: package 'quantmod' was built under R version 4.1.2
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.2
library(tseries)
library(timeSeries)
## Loading required package: timeDate
## 
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
## 
##     time<-
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:timeSeries':
## 
##     filter, lag
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(fGarch)
## Loading required package: fBasics
## 
## Attaching package: 'fBasics'
## The following object is masked from 'package:TTR':
## 
##     volatility
library(xts)
nvidia <- getSymbols("NVDA", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)
amd <- getSymbols("AMD", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)
intel <- getSymbols("INTC", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)

indf_data1 <- Cl(nvidia)
indf_data2 <- Cl(amd)
indf_data3 <- Cl(intel)

Nvidia stock market plot

chart_Series(indf_data1, col = "black")

add_SMA(n = 100, on = 1, col = "red")

add_SMA(n = 20, on = 1, col = "black")

add_RSI(n = 14, maType = "SMA")

add_BBands(n = 20, maType = "SMA", sd = 1, on = -1)

add_MACD(fast = 12, slow = 25, signal = 9, maType = "SMA", histogram = TRUE)

AMD stock market plot

chart_Series(indf_data2, col = "black")

add_SMA(n = 100, on = 1, col = "red")

add_SMA(n = 20, on = 1, col = "black")

add_RSI(n = 14, maType = "SMA")

add_BBands(n = 20, maType = "SMA", sd = 1, on = -1)

add_MACD(fast = 12, slow = 25, signal = 9, maType = "SMA", histogram = TRUE)

Intel stock market plot

chart_Series(indf_data3, col = "black")

add_SMA(n = 100, on = 1, col = "red")

add_SMA(n = 20, on = 1, col = "black")

add_RSI(n = 14, maType = "SMA")

add_BBands(n = 20, maType = "SMA", sd = 1, on = -1)

add_MACD(fast = 12, slow = 25, signal = 9, maType = "SMA", histogram = TRUE)

indf_log1 <- log(indf_data1)
indf_log2 <- log(indf_data2)
indf_log3 <- log(indf_data3)
head(indf_log1, n = 2)
##            NVDA.Close
## 2019-03-11   3.695979
## 2019-03-12   3.704507
tail(indf_log1, n = 2)
##            NVDA.Close
## 2021-03-05   4.825229
## 2021-03-08   4.753008
head(indf_log2, n = 2)
##            AMD.Close
## 2019-03-11  3.133754
## 2019-03-12  3.156575
tail(indf_log2, n = 2)
##            AMD.Close
## 2021-03-05  4.363353
## 2021-03-08  4.303524
head(indf_log3, n = 2)
##            INTC.Close
## 2019-03-11   3.976874
## 2019-03-12   3.980989
tail(indf_log3, n = 2)
##            INTC.Close
## 2021-03-05   4.106602
## 2021-03-08   4.091841
plot(indf_log1, main = "log Nvidia chart")

plot(indf_log2, main = "log AMD chart")

plot(indf_log3, main = "log Intel chart")

acf_log1 <- acf(indf_log1, lag.max = 320)

acf_log2 <- acf(indf_log2, lag.max = 320)

acf_log3 <- acf(indf_log3, lag.max = 320)

pacf_log1 <- pacf(indf_log1, lag.max = 320)

pacf_log2 <- pacf(indf_log2, lag.max = 320)

pacf_log3 <- pacf(indf_log3, lag.max = 320)

indf_diff1 <- diff(indf_log1, lag = 1)
indf_diff1 <- na.locf(indf_diff1, na.rm = TRUE,fromLast = TRUE)
indf_diff2 <- diff(indf_log2, lag = 1)
indf_diff2 <- na.locf(indf_diff2, na.rm = TRUE,fromLast = TRUE)
indf_diff3 <- diff(indf_log3, lag = 1)
indf_diff3 <- na.locf(indf_diff3, na.rm = TRUE,fromLast = TRUE)
plot(indf_diff1)

plot(indf_diff2)

plot(indf_diff3)

As the log data is not stationary, we should difference at certain lag for it to be stationary.

Augmented Dickey Fuller Test

adf1 <- adf.test(indf_log1, alternative = c("stationary", "explosive"), k = 0)
adf2 <- adf.test(indf_log2, alternative = c("stationary", "explosive"), k = 0)
adf3 <- adf.test(indf_log3, alternative = c("stationary", "explosive"), k = 0)
adf1
## 
##  Augmented Dickey-Fuller Test
## 
## data:  indf_log1
## Dickey-Fuller = -2.4229, Lag order = 0, p-value = 0.3993
## alternative hypothesis: stationary
adf2
## 
##  Augmented Dickey-Fuller Test
## 
## data:  indf_log2
## Dickey-Fuller = -2.9462, Lag order = 0, p-value = 0.1778
## alternative hypothesis: stationary
adf3
## 
##  Augmented Dickey-Fuller Test
## 
## data:  indf_log3
## Dickey-Fuller = -2.8627, Lag order = 0, p-value = 0.2132
## alternative hypothesis: stationary
library(caTools)
train_data1 <- indf_diff1[1:365]
train_data2 <- indf_diff2[1:365]
train_data3 <- indf_diff3[1:365]

library(forecast)
set.seed(123)
arima_model1 <- auto.arima(train_data1, stationary = TRUE, ic = c("aicc", "aic", "bic"), 
                          trace = TRUE)
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(2,0,2) with non-zero mean : -1502.19
##  ARIMA(0,0,0) with non-zero mean : -1475.995
##  ARIMA(1,0,0) with non-zero mean : -1500.485
##  ARIMA(0,0,1) with non-zero mean : -1494.591
##  ARIMA(0,0,0) with zero mean     : -1474.638
##  ARIMA(1,0,2) with non-zero mean : -1505.109
##  ARIMA(0,0,2) with non-zero mean : -1508.164
##  ARIMA(0,0,3) with non-zero mean : -1506.108
##  ARIMA(1,0,1) with non-zero mean : -1501.82
##  ARIMA(1,0,3) with non-zero mean : Inf
##  ARIMA(0,0,2) with zero mean     : -1506.2
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(0,0,2) with non-zero mean : -1508.038
## 
##  Best model: ARIMA(0,0,2) with non-zero mean
arima_model2 <- auto.arima(train_data2, stationary = TRUE, ic = c("aicc", "aic", "bic"), 
                          trace = TRUE)
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(2,0,2) with non-zero mean : -1388.553
##  ARIMA(0,0,0) with non-zero mean : -1379.434
##  ARIMA(1,0,0) with non-zero mean : -1390.582
##  ARIMA(0,0,1) with non-zero mean : -1388.651
##  ARIMA(0,0,0) with zero mean     : -1378.017
##  ARIMA(2,0,0) with non-zero mean : -1391.91
##  ARIMA(3,0,0) with non-zero mean : -1388.903
##  ARIMA(2,0,1) with non-zero mean : -1389.856
##  ARIMA(1,0,1) with non-zero mean : -1390.969
##  ARIMA(3,0,1) with non-zero mean : -1387.702
##  ARIMA(2,0,0) with zero mean     : -1390.157
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(2,0,0) with non-zero mean : -1393.154
## 
##  Best model: ARIMA(2,0,0) with non-zero mean
arima_model3 <- auto.arima(train_data3, stationary = TRUE, ic = c("aicc", "aic", "bic"), 
                          trace = TRUE)
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(2,0,2) with non-zero mean : -1588.275
##  ARIMA(0,0,0) with non-zero mean : -1543.129
##  ARIMA(1,0,0) with non-zero mean : -1581.114
##  ARIMA(0,0,1) with non-zero mean : -1571.65
##  ARIMA(0,0,0) with zero mean     : -1545.126
##  ARIMA(1,0,2) with non-zero mean : -1590.359
##  ARIMA(0,0,2) with non-zero mean : -1593.295
##  ARIMA(0,0,3) with non-zero mean : -1591.361
##  ARIMA(1,0,1) with non-zero mean : -1580.727
##  ARIMA(1,0,3) with non-zero mean : -1595.483
##  ARIMA(2,0,3) with non-zero mean : -1592.761
##  ARIMA(1,0,4) with non-zero mean : -1594.294
##  ARIMA(0,0,4) with non-zero mean : -1590.563
##  ARIMA(2,0,4) with non-zero mean : -1590.985
##  ARIMA(1,0,3) with zero mean     : -1597.513
##  ARIMA(0,0,3) with zero mean     : -1593.39
##  ARIMA(1,0,2) with zero mean     : -1592.383
##  ARIMA(2,0,3) with zero mean     : -1594.831
##  ARIMA(1,0,4) with zero mean     : -1596.333
##  ARIMA(0,0,2) with zero mean     : -1595.311
##  ARIMA(0,0,4) with zero mean     : -1592.598
##  ARIMA(2,0,2) with zero mean     : -1590.304
##  ARIMA(2,0,4) with zero mean     : -1593.038
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(1,0,3) with zero mean     : -1597.997
## 
##  Best model: ARIMA(1,0,3) with zero mean
summary(arima_model1)
## Series: train_data1 
## ARIMA(0,0,2) with non-zero mean 
## 
## Coefficients:
##           ma1     ma2    mean
##       -0.2425  0.2133  0.0031
## s.e.   0.0512  0.0518  0.0015
## 
## sigma^2 = 0.0009268:  log likelihood = 758.07
## AIC=-1508.15   AICc=-1508.04   BIC=-1492.55
## 
## Training set error measures:
##                        ME       RMSE        MAE  MPE MAPE      MASE
## Training set -9.84976e-06 0.03031723 0.02099743 -Inf  Inf 0.6011294
##                       ACF1
## Training set -0.0008235515
summary(arima_model2)
## Series: train_data2 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1     ar2    mean
##       -0.1728  0.1034  0.0035
## s.e.   0.0520  0.0520  0.0017
## 
## sigma^2 = 0.00127:  log likelihood = 700.63
## AIC=-1393.27   AICc=-1393.15   BIC=-1377.67
## 
## Training set error measures:
##                        ME       RMSE        MAE  MPE MAPE      MASE
## Training set -2.73513e-06 0.03548796 0.02496617 -Inf  Inf 0.6317874
##                      ACF1
## Training set -0.001178684
summary(arima_model3)
## Series: train_data3 
## ARIMA(1,0,3) with zero mean 
## 
## Coefficients:
##           ar1     ma1      ma2     ma3
##       -0.8227  0.5472  -0.0078  0.2928
## s.e.   0.0758  0.0835   0.0605  0.0494
## 
## sigma^2 = 0.0007219:  log likelihood = 804.08
## AIC=-1598.16   AICc=-1598   BIC=-1578.66
## 
## Training set error measures:
##                         ME    RMSE        MAE MPE MAPE      MASE        ACF1
## Training set -0.0002327006 0.02672 0.01696816 Inf  Inf 0.6269938 -0.01336257
checkresiduals(arima_model1) #diagnostic checking

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,0,2) with non-zero mean
## Q* = 29.266, df = 7, p-value = 0.0001294
## 
## Model df: 3.   Total lags used: 10
checkresiduals(arima_model2) #diagnostic checking

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,0) with non-zero mean
## Q* = 19.422, df = 7, p-value = 0.006963
## 
## Model df: 3.   Total lags used: 10
checkresiduals(arima_model3) #diagnostic checking

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,3) with zero mean
## Q* = 17.708, df = 6, p-value = 0.007005
## 
## Model df: 4.   Total lags used: 10
arima1 <- arima(train_data1, order = c(0, 0, 2))
arima2 <- arima(train_data2, order = c(2, 0, 0))
arima3 <- arima(train_data3, order = c(1, 0, 3))
summary(arima1)
## 
## Call:
## arima(x = train_data1, order = c(0, 0, 2))
## 
## Coefficients:
##           ma1     ma2  intercept
##       -0.2425  0.2133     0.0031
## s.e.   0.0512  0.0518     0.0015
## 
## sigma^2 estimated as 0.0009191:  log likelihood = 758.07,  aic = -1508.15
## 
## Training set error measures:
##                        ME       RMSE        MAE  MPE MAPE      MASE
## Training set -9.84976e-06 0.03031723 0.02099743 -Inf  Inf 0.6011294
##                       ACF1
## Training set -0.0008235515
summary(arima2)
## 
## Call:
## arima(x = train_data2, order = c(2, 0, 0))
## 
## Coefficients:
##           ar1     ar2  intercept
##       -0.1728  0.1034     0.0035
## s.e.   0.0520  0.0520     0.0017
## 
## sigma^2 estimated as 0.001259:  log likelihood = 700.63,  aic = -1393.27
## 
## Training set error measures:
##                        ME       RMSE        MAE  MPE MAPE      MASE
## Training set -2.73513e-06 0.03548796 0.02496617 -Inf  Inf 0.6317874
##                      ACF1
## Training set -0.001178684
summary(arima3)
## 
## Call:
## arima(x = train_data3, order = c(1, 0, 3))
## 
## Coefficients:
##           ar1     ma1      ma2     ma3  intercept
##       -0.8228  0.5472  -0.0078  0.2928    -0.0002
## s.e.   0.0758  0.0835   0.0605  0.0494     0.0014
## 
## sigma^2 estimated as 0.0007139:  log likelihood = 804.09,  aic = -1596.19
## 
## Training set error measures:
##                         ME       RMSE        MAE MPE MAPE      MASE        ACF1
## Training set -1.366658e-05 0.02671909 0.01697492 Inf  Inf 0.6272434 -0.01337021
forecast1 <- forecast(arima1, h = 100)
forecast2 <- forecast(arima2, h = 100)
forecast3 <- forecast(arima3, h = 100)
plot(forecast1)

plot(forecast2)

plot(forecast3)

forecast_ori1 <- forecast(arima1, h = 200)
a <- ts(indf_log1)
forecast_ori1 %>% autoplot() + autolayer(a)

forecast_ori2 <- forecast(arima2, h = 100)
a <- ts(indf_log2)
forecast_ori2 %>% autoplot() + autolayer(a)

forecast_ori3 <- forecast(arima3, h = 100)
a <- ts(indf_log3)
forecast_ori3 %>% autoplot() + autolayer(a)

##Implementing STL Approach for Stock Price Forecasting

nvidia <- getSymbols("NVDA", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)
amd <- getSymbols("AMD", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)
intel <- getSymbols("INTC", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)

indf_data1 <- Cl(nvidia)
indf_data2 <- Cl(amd)
indf_data3 <- Cl(intel)
freq <- 7
adj <- ts(indf_data1, frequency = freq)

whole.periods <- floor(nrow(adj) / freq)
partial.periods <- nrow(adj) %% freq

desired.test <- 3
training.end.row <- whole.periods + 1
training.end.col <- ifelse(partial.periods == 0, freq - desired.test, freq - partial.periods - desired.test)
if(partial.periods < desired.test){
  training.end.row <- whole.periods
  training.end.col <- freq - (desired.test - partial.periods)
}
training.ts <- window(adj, c(1,1), c(training.end.row,training.end.col))
testing.ts <- window(adj, c(training.end.row, training.end.col + 1))

fit.stl <- stl(training.ts[,1], s.window = "period")

plot(fit.stl, main="STL Decomposition")

forecasted.adj <- stlf(training.ts[,1], s.window = "period", method="arima", h=desired.test)
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o weekly data)")

accuracy(forecasted.adj, testing.ts)
##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  0.0002197383 2.461269 1.677249 -0.1328148 2.142144 0.4189962
## Test set     -8.9809265685 9.231201 8.980927 -6.6214605 6.621461 2.2435392
##                      ACF1 Theil's U
## Training set -0.005841008        NA
## Test set     -0.111703355  2.454408
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o month data)", xlim = c(60, 75))

freq <- 7
adj <- ts(indf_data2, frequency = freq)

whole.periods <- floor(nrow(adj) / freq)
partial.periods <- nrow(adj) %% freq

desired.test <- 3
training.end.row <- whole.periods + 1
training.end.col <- ifelse(partial.periods == 0, freq - desired.test, freq - partial.periods - desired.test)
if(partial.periods < desired.test){
  training.end.row <- whole.periods
  training.end.col <- freq - (desired.test - partial.periods)
}
training.ts <- window(adj, c(1,1), c(training.end.row,training.end.col))
testing.ts <- window(adj, c(training.end.row, training.end.col + 1))

fit.stl <- stl(training.ts[,1], s.window = "period")

plot(fit.stl, main="STL Decomposition")

forecasted.adj <- stlf(training.ts[,1], s.window = "period", method="arima", h=desired.test)
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o weekly data)")

accuracy(forecasted.adj, testing.ts)
##                        ME     RMSE      MAE         MPE     MAPE      MASE
## Training set  0.000157209 1.821370 1.235350 -0.08391814 2.360181 0.3817300
## Test set     -2.664573613 3.055589 2.664574 -3.19070193 3.190702 0.8233679
##                      ACF1 Theil's U
## Training set -0.006049113        NA
## Test set     -0.010945417 0.9274687
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o month data)", xlim = c(60, 75))

freq <- 7
adj <- ts(indf_data3, frequency = freq)

whole.periods <- floor(nrow(adj) / freq)
partial.periods <- nrow(adj) %% freq

desired.test <- 3
training.end.row <- whole.periods + 1
training.end.col <- ifelse(partial.periods == 0, freq - desired.test, freq - partial.periods - desired.test)
if(partial.periods < desired.test){
  training.end.row <- whole.periods
  training.end.col <- freq - (desired.test - partial.periods)
}
training.ts <- window(adj, c(1,1), c(training.end.row,training.end.col))
testing.ts <- window(adj, c(training.end.row, training.end.col + 1))

fit.stl <- stl(training.ts[,1], s.window = "period")

plot(fit.stl, main="STL Decomposition")

forecasted.adj <- stlf(training.ts[,1], s.window = "period", method="arima", h=desired.test, lambda = 4)
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o weekly data)")

accuracy(forecasted.adj, testing.ts)
##                       ME     RMSE       MAE          MPE     MAPE      MASE
## Training set  0.01763849 1.408657 0.8949262 -0.006897264 1.672304 0.3597486
## Test set     -1.58380541 1.887049 1.5838054 -2.611414023 2.611414 0.6366690
##                     ACF1 Theil's U
## Training set -0.01084741        NA
## Test set     -0.15526621  1.078559
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o month data)", xlim = c(60, 75))